The code chunk below is used to install and load the required packages onto RStudio.
packages = c('tidyverse','treemap','ggrepel','lubridate','gapminder','gganimate','ggiraph','plotly','zoo', 'tmap', 'sf','trelliscopejs', 'hrbrthemes','transformr',
'lubridate','clock',
'sftime','rmarkdown','data.table')
for(p in packages){
if(!require(p, character.only =T)){
install.packages(p)
}
library(p, character.only =T)
}
The code chuck below import Employers.csv,
Buildings.csv, Jobs.csv, CheckinJournal.csv
and Participants.csv from the data folder into R by using
read_csv() and save it as an tibble data frame.
Employers <- read_csv("data/Employers.csv")
Buildings <- read_csv("data/Buildings.csv")
Jobs <- read_csv("data/Jobs.csv")
Checkin <- read_csv("data/CheckinJournal.csv")
Participants <- read_csv("data/Participants.csv")
Checkin <- Checkin %>%
rename('employerId' = 'venueId')
#Extract the date from timestamp
Checkin$Date <- as.Date(Checkin$timestamp)
#Filter rows with workplace as value
Workplace_Checkin <- Checkin[grep("Workplace", Checkin$venueType),]
#Assign Running Week Number
Workplace_Checkin <- Workplace_Checkin %>%
mutate(Week_Num = as.double(ceiling(difftime(Workplace_Checkin$Date, strptime("01.03.2022", format = "%d.%m.%Y"),units="weeks"))))
#Compute no. of employees that report to work during that week (5day work week)
Count_Checkin <- Workplace_Checkin %>%
group_by(Week_Num, employerId) %>%
summarise('Num_of_Employees'= n_distinct(participantId)) %>%
ungroup()
#Calculate Percentage Change
Count_Checkin <- Count_Checkin %>%
group_by(employerId) %>%
mutate(Perc_Chg = round((Num_of_Employees - lag(Num_of_Employees))/lag(Num_of_Employees)*100,2))%>%
replace(is.na(.), 0)
Count_Checkin <- merge(Count_Checkin, Employers, by ='employerId')
write_csv(Count_Checkin, "data/csv/Count_Checkin.csv")
Count_Checkin_sf <- read_sf("data/csv/Count_Checkin.csv",
options = "GEOM_POSSIBLE_NAMES=location")
Count_Checkin_sf$Num_of_Employees <- as.numeric(Count_Checkin_sf$Num_of_Employees)
Count_Checkin_sf$Week_Num <- as.numeric(Count_Checkin_sf$Week_Num)
#Compute no. of employers that each participants every week
Change_Job <- Workplace_Checkin %>%
group_by(participantId,Week_Num) %>%
summarise('Num_of_Employers'= n_distinct(employerId)) %>%
filter(Num_of_Employers >1) %>%
ungroup()
Change_Job <- merge(Change_Job, Participants, by ='participantId')
buildings <- read_sf("data/Buildings.csv",
options = "GEOM_POSSIBLE_NAMES=location")
employers <- read_sf("data/Employers.csv",
options = "GEOM_POSSIBLE_NAMES=location")
Employers <- Employers %>% left_join(Buildings,by="buildingId")
Next, we use outer join on Employers and Jobs based
on employerId to have a full overview on all the jobs that
are offered by each employer.
Employers = merge(x=Employers,y=Jobs,by="employerId",all=TRUE)
d <- highlight_key(Change_Job)
p1 <- ggplot(d, aes(x= as.factor(Num_of_Employers), fill = educationLevel)) +
geom_bar() +
facet_wrap(~educationLevel)+
ggtitle('Participants with >1 Employers') +
xlab("Numbers of Employers") +
ylab("No. of\nParticipants") +
theme(axis.title.y= element_text(angle=0), axis.ticks.x= element_blank(),
axis.line= element_line(color= 'grey'))
gg <- highlight(ggplotly(p1),
"plotly_selected")
crosstalk::bscols(gg,
widths = c(12,12),
DT::datatable(d,
rownames = FALSE),
class = 'display')
tmap_mode("view")
tm_shape(buildings)+
tm_polygons(col = "grey",
size = 1,
border.col = "black",
border.lwd = 1) +
tm_shape(employers) +
tm_dots(col = "red") +
tm_shape(Count_Checkin_sf) +
tm_bubbles(size ="Num_of_Employees", col = "lightblue")
ggplot() +
geom_sf(data = buildings) +
geom_sf(data = Count_Checkin_sf, aes(color =Num_of_Employees, size = Num_of_Employees), alpha = 0.3) +
scale_color_distiller(palette = "YlOrRd",trans = "reverse")+
theme_void()+
transition_states(Week_Num,
transition_length = 0.5,
state_length = 2)
yu